'\"
'\" Generated from file 'idx_parse\&.man' by tcllib/doctools with format 'nroff'
'\" Copyright (c) 2009 Andreas Kupries <andreas_kupries@users\&.sourceforge\&.net>
'\"
.TH "doctools::idx::parse" n 1 tcllib "Documentation tools"
.\" The -*- nroff -*- definitions below are for supplemental macros used
.\" in Tcl/Tk manual entries.
.\"
.\" .AP type name in/out ?indent?
.\"	Start paragraph describing an argument to a library procedure.
.\"	type is type of argument (int, etc.), in/out is either "in", "out",
.\"	or "in/out" to describe whether procedure reads or modifies arg,
.\"	and indent is equivalent to second arg of .IP (shouldn't ever be
.\"	needed;  use .AS below instead)
.\"
.\" .AS ?type? ?name?
.\"	Give maximum sizes of arguments for setting tab stops.  Type and
.\"	name are examples of largest possible arguments that will be passed
.\"	to .AP later.  If args are omitted, default tab stops are used.
.\"
.\" .BS
.\"	Start box enclosure.  From here until next .BE, everything will be
.\"	enclosed in one large box.
.\"
.\" .BE
.\"	End of box enclosure.
.\"
.\" .CS
.\"	Begin code excerpt.
.\"
.\" .CE
.\"	End code excerpt.
.\"
.\" .VS ?version? ?br?
.\"	Begin vertical sidebar, for use in marking newly-changed parts
.\"	of man pages.  The first argument is ignored and used for recording
.\"	the version when the .VS was added, so that the sidebars can be
.\"	found and removed when they reach a certain age.  If another argument
.\"	is present, then a line break is forced before starting the sidebar.
.\"
.\" .VE
.\"	End of vertical sidebar.
.\"
.\" .DS
.\"	Begin an indented unfilled display.
.\"
.\" .DE
.\"	End of indented unfilled display.
.\"
.\" .SO ?manpage?
.\"	Start of list of standard options for a Tk widget. The manpage
.\"	argument defines where to look up the standard options; if
.\"	omitted, defaults to "options". The options follow on successive
.\"	lines, in three columns separated by tabs.
.\"
.\" .SE
.\"	End of list of standard options for a Tk widget.
.\"
.\" .OP cmdName dbName dbClass
.\"	Start of description of a specific option.  cmdName gives the
.\"	option's name as specified in the class command, dbName gives
.\"	the option's name in the option database, and dbClass gives
.\"	the option's class in the option database.
.\"
.\" .UL arg1 arg2
.\"	Print arg1 underlined, then print arg2 normally.
.\"
.\" .QW arg1 ?arg2?
.\"	Print arg1 in quotes, then arg2 normally (for trailing punctuation).
.\"
.\" .PQ arg1 ?arg2?
.\"	Print an open parenthesis, arg1 in quotes, then arg2 normally
.\"	(for trailing punctuation) and then a closing parenthesis.
.\"
.\"	# Set up traps and other miscellaneous stuff for Tcl/Tk man pages.
.if t .wh -1.3i ^B
.nr ^l \n(.l
.ad b
.\"	# Start an argument description
.de AP
.ie !"\\$4"" .TP \\$4
.el \{\
.   ie !"\\$2"" .TP \\n()Cu
.   el          .TP 15
.\}
.ta \\n()Au \\n()Bu
.ie !"\\$3"" \{\
\&\\$1 \\fI\\$2\\fP (\\$3)
.\".b
.\}
.el \{\
.br
.ie !"\\$2"" \{\
\&\\$1	\\fI\\$2\\fP
.\}
.el \{\
\&\\fI\\$1\\fP
.\}
.\}
..
.\"	# define tabbing values for .AP
.de AS
.nr )A 10n
.if !"\\$1"" .nr )A \\w'\\$1'u+3n
.nr )B \\n()Au+15n
.\"
.if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n
.nr )C \\n()Bu+\\w'(in/out)'u+2n
..
.AS Tcl_Interp Tcl_CreateInterp in/out
.\"	# BS - start boxed text
.\"	# ^y = starting y location
.\"	# ^b = 1
.de BS
.br
.mk ^y
.nr ^b 1u
.if n .nf
.if n .ti 0
.if n \l'\\n(.lu\(ul'
.if n .fi
..
.\"	# BE - end boxed text (draw box now)
.de BE
.nf
.ti 0
.mk ^t
.ie n \l'\\n(^lu\(ul'
.el \{\
.\"	Draw four-sided box normally, but don't draw top of
.\"	box if the box started on an earlier page.
.ie !\\n(^b-1 \{\
\h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul'
.\}
.el \}\
\h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul'
.\}
.\}
.fi
.br
.nr ^b 0
..
.\"	# VS - start vertical sidebar
.\"	# ^Y = starting y location
.\"	# ^v = 1 (for troff;  for nroff this doesn't matter)
.de VS
.if !"\\$2"" .br
.mk ^Y
.ie n 'mc \s12\(br\s0
.el .nr ^v 1u
..
.\"	# VE - end of vertical sidebar
.de VE
.ie n 'mc
.el \{\
.ev 2
.nf
.ti 0
.mk ^t
\h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n'
.sp -1
.fi
.ev
.\}
.nr ^v 0
..
.\"	# Special macro to handle page bottom:  finish off current
.\"	# box/sidebar if in box/sidebar mode, then invoked standard
.\"	# page bottom macro.
.de ^B
.ev 2
'ti 0
'nf
.mk ^t
.if \\n(^b \{\
.\"	Draw three-sided box if this is the box's first page,
.\"	draw two sides but no top otherwise.
.ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c
.el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c
.\}
.if \\n(^v \{\
.nr ^x \\n(^tu+1v-\\n(^Yu
\kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c
.\}
.bp
'fi
.ev
.if \\n(^b \{\
.mk ^y
.nr ^b 2
.\}
.if \\n(^v \{\
.mk ^Y
.\}
..
.\"	# DS - begin display
.de DS
.RS
.nf
.sp
..
.\"	# DE - end display
.de DE
.fi
.RE
.sp
..
.\"	# SO - start of list of standard options
.de SO
'ie '\\$1'' .ds So \\fBoptions\\fR
'el .ds So \\fB\\$1\\fR
.SH "STANDARD OPTIONS"
.LP
.nf
.ta 5.5c 11c
.ft B
..
.\"	# SE - end of list of standard options
.de SE
.fi
.ft R
.LP
See the \\*(So manual entry for details on the standard options.
..
.\"	# OP - start of full description for a single option
.de OP
.LP
.nf
.ta 4c
Command-Line Name:	\\fB\\$1\\fR
Database Name:	\\fB\\$2\\fR
Database Class:	\\fB\\$3\\fR
.fi
.IP
..
.\"	# CS - begin code excerpt
.de CS
.RS
.nf
.ta .25i .5i .75i 1i
..
.\"	# CE - end code excerpt
.de CE
.fi
.RE
..
.\"	# UL - underline word
.de UL
\\$1\l'|0\(ul'\\$2
..
.\"	# QW - apply quotation marks to word
.de QW
.ie '\\*(lq'"' ``\\$1''\\$2
.\"" fix emacs highlighting
.el \\*(lq\\$1\\*(rq\\$2
..
.\"	# PQ - apply parens and quotation marks to word
.de PQ
.ie '\\*(lq'"' (``\\$1''\\$2)\\$3
.\"" fix emacs highlighting
.el (\\*(lq\\$1\\*(rq\\$2)\\$3
..
.\"	# QR - quoted range
.de QR
.ie '\\*(lq'"' ``\\$1''\\-``\\$2''\\$3
.\"" fix emacs highlighting
.el \\*(lq\\$1\\*(rq\\-\\*(lq\\$2\\*(rq\\$3
..
.\"	# MT - "empty" string
.de MT
.QW ""
..
.BS
.SH NAME
doctools::idx::parse \- Parsing text in docidx format
.SH SYNOPSIS
package require \fBdoctools::idx::parse  ?0\&.1?\fR
.sp
package require \fBTcl  8\&.4\fR
.sp
package require \fBdoctools::idx::structure \fR
.sp
package require \fBdoctools::msgcat \fR
.sp
package require \fBdoctools::tcl::parse \fR
.sp
package require \fBfileutil \fR
.sp
package require \fBlogger \fR
.sp
package require \fBsnit \fR
.sp
package require \fBstruct::list \fR
.sp
package require \fBstruct::stack \fR
.sp
\fB::doctools::idx::parse\fR \fBtext\fR \fItext\fR
.sp
\fB::doctools::idx::parse\fR \fBfile\fR \fIpath\fR
.sp
\fB::doctools::idx::parse\fR \fBincludes\fR
.sp
\fB::doctools::idx::parse\fR \fBinclude add\fR \fIpath\fR
.sp
\fB::doctools::idx::parse\fR \fBinclude remove\fR \fIpath\fR
.sp
\fB::doctools::idx::parse\fR \fBinclude clear\fR
.sp
\fB::doctools::idx::parse\fR \fBvars\fR
.sp
\fB::doctools::idx::parse\fR \fBvar set\fR \fIname\fR \fIvalue\fR
.sp
\fB::doctools::idx::parse\fR \fBvar unset\fR \fIname\fR
.sp
\fB::doctools::idx::parse\fR \fBvar clear\fR ?\fIpattern\fR?
.sp
.BE
.SH DESCRIPTION
This package provides commands to parse text written in the
\fIdocidx\fR markup language and convert it into the canonical
serialization of the keyword index encoded in the text\&.
See the section \fBKeyword index serialization format\fR for
specification of their format\&.
.PP
This is an internal package of doctools, for use by the higher level
packages handling \fIdocidx\fR documents\&.
.SH API
.TP
\fB::doctools::idx::parse\fR \fBtext\fR \fItext\fR
The command takes the string contained in \fItext\fR and parses it
under the assumption that it contains a document written using the
\fIdocidx\fR markup language\&. An error is thrown if this assumption
is found to be false\&. The format of these errors is described in
section \fBParse errors\fR\&.
.sp
When successful the command returns the canonical serialization of the
keyword index which was encoded in the text\&.
See the section \fBKeyword index serialization format\fR for
specification of that format\&.
.TP
\fB::doctools::idx::parse\fR \fBfile\fR \fIpath\fR
The same as \fBtext\fR, except that the text to parse is read from
the file specified by \fIpath\fR\&.
.TP
\fB::doctools::idx::parse\fR \fBincludes\fR
This method returns the current list of search paths used when looking
for include files\&.
.TP
\fB::doctools::idx::parse\fR \fBinclude add\fR \fIpath\fR
This method adds the \fIpath\fR to the list of paths searched when
looking for an include file\&. The call is ignored if the path is
already in the list of paths\&. The method returns the empty string as
its result\&.
.TP
\fB::doctools::idx::parse\fR \fBinclude remove\fR \fIpath\fR
This method removes the \fIpath\fR from the list of paths searched
when looking for an include file\&. The call is ignored if the path is
not contained in the list of paths\&. The method returns the empty
string as its result\&.
.TP
\fB::doctools::idx::parse\fR \fBinclude clear\fR
This method clears the list of search paths for include files\&.
.TP
\fB::doctools::idx::parse\fR \fBvars\fR
This method returns a dictionary containing the current set of
predefined variables known to the \fBvset\fR markup command during
processing\&.
.TP
\fB::doctools::idx::parse\fR \fBvar set\fR \fIname\fR \fIvalue\fR
This method adds the variable \fIname\fR to the set of predefined
variables known to the \fBvset\fR markup command during processing,
and gives it the specified \fIvalue\fR\&. The method returns the empty
string as its result\&.
.TP
\fB::doctools::idx::parse\fR \fBvar unset\fR \fIname\fR
This method removes the variable \fIname\fR from the set of predefined
variables known to the \fBvset\fR markup command during
processing\&. The method returns the empty string as its result\&.
.TP
\fB::doctools::idx::parse\fR \fBvar clear\fR ?\fIpattern\fR?
This method removes all variables matching the \fIpattern\fR from the
set of predefined variables known to the \fBvset\fR markup command
during processing\&. The method returns the empty string as its result\&.
.sp
The pattern matching is done with \fBstring match\fR, and the
default pattern used when none is specified, is \fB*\fR\&.
.PP
.SH "PARSE ERRORS"
The format of the parse error messages thrown when encountering
violations of the \fIdocidx\fR markup syntax is human readable and
not intended for processing by machines\&. As such it is not documented\&.
.PP
\fIHowever\fR, the errorCode attached to the message is
machine-readable and has the following format:
.IP [1]
The error code will be a list, each element describing a single error
found in the input\&. The list has at least one element, possibly more\&.
.IP [2]
Each error element will be a list containing six strings describing an
error in detail\&. The strings will be
.RS
.IP [1]
The path of the file the error occurred in\&. This may be empty\&.
.IP [2]
The range of the token the error was found at\&. This range is a
two-element list containing the offset of the first and last character
in the range, counted from the beginning of the input (file)\&. Offsets
are counted from zero\&.
.IP [3]
The line the first character after the error is on\&.
Lines are counted from one\&.
.IP [4]
The column the first character after the error is at\&.
Columns are counted from zero\&.
.IP [5]
The message code of the error\&. This value can be used as argument to
\fBmsgcat::mc\fR to obtain a localized error message, assuming that
the application had a suitable call of \fBdoctools::msgcat::init\fR to
initialize the necessary message catalogs (See package
\fBdoctools::msgcat\fR)\&.
.IP [6]
A list of details for the error, like the markup command involved\&. In
the case of message code \fBdocidx/include/syntax\fR this value is
the set of errors found in the included file, using the format
described here\&.
.RE
.PP
.SH "[DOCIDX] NOTATION OF KEYWORD INDICES"
The docidx format for keyword indices, also called the
\fIdocidx markup language\fR, is too large to be covered in single
section\&.
The interested reader should start with the document
.IP [1]
\fIdocidx language introduction\fR
.PP
and then proceed from there to the formal specifications, i\&.e\&. the
documents
.IP [1]
\fIdocidx language syntax\fR and
.IP [2]
\fIdocidx language command reference\fR\&.
.PP
to get a thorough understanding of the language\&.
.SH "KEYWORD INDEX SERIALIZATION FORMAT"
Here we specify the format used by the doctools v2 packages to
serialize keyword indices as immutable values for transport,
comparison, etc\&.
.PP
We distinguish between \fIregular\fR and \fIcanonical\fR
serializations\&. While a keyword index may have more than one regular
serialization only exactly one of them will be \fIcanonical\fR\&.
.PP
.TP
regular serialization
.RS
.IP [1]
An index serialization is a nested Tcl dictionary\&.
.IP [2]
This dictionary holds a single key, \fBdoctools::idx\fR, and its
value\&. This value holds the contents of the index\&.
.IP [3]
The contents of the index are a Tcl dictionary holding the title of
the index, a label, and the keywords and references\&. The relevant keys
and their values are
.RS
.TP
\fBtitle\fR
The value is a string containing the title of the index\&.
.TP
\fBlabel\fR
The value is a string containing a label for the index\&.
.TP
\fBkeywords\fR
The value is a Tcl dictionary, using the keywords known to the index
as keys\&. The associated values are lists containing the identifiers of
the references associated with that particular keyword\&.
.sp
Any reference identifier used in these lists has to exist as a key in
the \fBreferences\fR dictionary, see the next item for its
definition\&.
.TP
\fBreferences\fR
The value is a Tcl dictionary, using the identifiers for the
references known to the index as keys\&. The associated values are
2-element lists containing the type and label of the reference, in
this order\&.
.sp
Any key here has to be associated with at least one keyword,
i\&.e\&. occur in at least one of the reference lists which are the values
in the \fBkeywords\fR dictionary, see previous item for its
definition\&.
.RE
.IP [4]
The \fItype\fR of a reference can be one of two values,
.RS
.TP
\fBmanpage\fR
The identifier of the reference is interpreted as symbolic file name,
referring to one of the documents the index was made for\&.
.TP
\fBurl\fR
The identifier of the reference is interpreted as an url, referring to
some external location, like a website, etc\&.
.RE
.RE
.TP
canonical serialization
The canonical serialization of a keyword index has the format as
specified in the previous item, and then additionally satisfies the
constraints below, which make it unique among all the possible
serializations of the keyword index\&.
.RS
.IP [1]
The keys found in all the nested Tcl dictionaries are sorted in
ascending dictionary order, as generated by Tcl's builtin command
\fBlsort -increasing -dict\fR\&.
.IP [2]
The references listed for each keyword of the index, if any, are
listed in ascending dictionary order of their \fIlabels\fR, as
generated by Tcl's builtin command \fBlsort -increasing -dict\fR\&.
.RE
.PP
.SH "BUGS, IDEAS, FEEDBACK"
This document, and the package it describes, will undoubtedly contain
bugs and other problems\&.
Please report such in the category \fIdoctools\fR of the
\fITcllib Trackers\fR [http://core\&.tcl\&.tk/tcllib/reportlist]\&.
Please also report any ideas for enhancements you may have for either
package and/or documentation\&.
.PP
When proposing code changes, please provide \fIunified diffs\fR,
i\&.e the output of \fBdiff -u\fR\&.
.PP
Note further that \fIattachments\fR are strongly preferred over
inlined patches\&. Attachments can be made by going to the \fBEdit\fR
form of the ticket immediately after its creation, and then using the
left-most button in the secondary navigation bar\&.
.SH KEYWORDS
docidx, doctools, lexer, parser
.SH CATEGORY
Documentation tools
.SH COPYRIGHT
.nf
Copyright (c) 2009 Andreas Kupries <andreas_kupries@users\&.sourceforge\&.net>

.fi
